import pandas as pd
import altair as alt
alt.data_transformers.disable_max_rows() # Disable 5_000 rows limit
data = pd.read_csv("https://github.com/Krow10/code4rena-scraper/raw/master/leaderboard_code4rena.csv") # Set path accordingly
data
| period | handle | is_team | prize_money | total_reports | high_all | high_solo | med_all | med_solo | gas_all | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | LAST 60 DAYS | Soosh | False | 70985.01 | 1 | 0 | 0 | 1 | 1 | 0 |
| 1 | LAST 60 DAYS | HE1M | False | 40620.85 | 20 | 6 | 0 | 11 | 1 | 0 |
| 2 | LAST 60 DAYS | codehacker | False | 24571.73 | 1 | 0 | 0 | 1 | 0 | 0 |
| 3 | LAST 60 DAYS | Trust | False | 22909.46 | 31 | 9 | 0 | 19 | 3 | 0 |
| 4 | LAST 60 DAYS | cccz | False | 20523.95 | 17 | 5 | 1 | 10 | 2 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2286 | ALL TIME | alexxander | False | 0.00 | 1 | 0 | 0 | 1 | 0 | 0 |
| 2287 | ALL TIME | navinavu | False | 0.00 | 1 | 0 | 0 | 1 | 0 | 0 |
| 2288 | ALL TIME | Kumpirmafyas | False | 0.00 | 2 | 0 | 0 | 0 | 0 | 0 |
| 2289 | ALL TIME | 0xMesaj | False | 0.00 | 1 | 0 | 0 | 0 | 0 | 0 |
| 2290 | ALL TIME | ad3sh_ | False | 0.00 | 2 | 0 | 0 | 0 | 0 | 0 |
2291 rows × 10 columns
plt_data = data.copy()
Assumes missing reports from total_reports are QA reports
plt_data["qa_all"] = plt_data["total_reports"] - (plt_data["high_all"] + plt_data["med_all"] + plt_data["gas_all"])
plt_data = plt_data.melt(id_vars=["period", "prize_money"], value_vars=list(plt_data.columns[3:]), var_name="type")
plt_data["type"] = plt_data["type"].map(
{
'gas_all': 'Gas optimization',
'high_all': 'Common high',
'high_solo': 'Unique high',
'med_all': 'Common medium',
'med_solo': 'Unique medium',
'qa_all': 'QA',
'total_reports': 'Total reports'
}
)
Filter from which period the data should be taken. These are the same options as the leaderboard entries from the Code4rena website.
period_dropdown = alt.binding_select(options=list(plt_data["period"].unique()), name='Select a period:')
period_selector = alt.selection_single(
fields=['period'],
bind=period_dropdown,
name="period_selector",
init={'period':'ALL TIME'}
)
ordered_legend_reports_labels = [
'Unique high',
'Common high',
'Unique medium',
'Common medium',
'Gas optimization',
'QA',
'Total reports'
]
label_colors = ["#FE266D","#FA6C44","#F2E713","#D1D811","#0AB6F8","#5688C1","#AA3678"]
base = alt.Chart(plt_data).transform_calculate(
order="{ \
'Total reports': 0, \
'QA': 1, \
'Gas optimization': 2, \
'Common medium': 3, \
'Unique medium': 4, \
'Common high': 5, \
'Unique high': 6}[datum.risk]"
).mark_circle().encode(
x=alt.X('value:Q', scale=alt.Scale(type='symlog'), title='Number of reports'),
y=alt.Y('prize_money:Q', title='Prize money ($USD)', scale=alt.Scale(domainMin=0)),
color=alt.Color(
'type:N',
title='Report type',
scale=alt.Scale(domain=ordered_legend_reports_labels, range=label_colors),
legend=alt.Legend(
title='Reports type',
values=ordered_legend_reports_labels,
orient="top",
labelFontSize=14,
symbolStrokeWidth=10
)
),
order='order:O'
).properties(
width=850,
height=400
).add_selection(
period_selector
).transform_filter(
period_selector
)
For a given period, shows the correlation (via polynomial regression) between the number of reports and the prize money of a warden for each report category.
Note that the x-axis is a logarithmic scale.
base.transform_regression(
'value',
'prize_money',
method='poly',
groupby=['type']
).mark_line().configure_axis(titleFontSize=14, labelFontSize=12)
/opt/hostedtoolcache/Python/3.11.1/x64/lib/python3.11/site-packages/altair/utils/core.py:317: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead. for col_name, dtype in df.dtypes.iteritems():
df = pd.read_csv('https://raw.githubusercontent.com/code-423n4/code423n4.com/main/_data/findings/findings.csv')
ordered_risk_labels = ['0', '5_QA', '4_Gas optimization', '3_Low', '2_Med', '1_High']
original = ['0', 'q', 'g', '1', '2', '3']
risk_labels = dict(zip(original, ordered_risk_labels))
df['risk'] = df['risk'].map(risk_labels)
chart = alt.Chart(width=800, height=400).transform_filter(
(alt.datum.split == 1) & (alt.datum.risk != '0')
).mark_bar(
).encode(
y=alt.Y("count()", title=''),
color=alt.Color(
'risk:N',
scale=alt.Scale(scheme='yelloworangered',
domain=ordered_risk_labels[1:])
)
).resolve_scale(y='independent')
Set to round the award money ranges to the nearest multiple of this variable
round_base = 10
steps = []
for _risk in ordered_risk_labels[::-1]:
awards = df[(df.risk == _risk) & (df.split == 1) & (df.awardUSD != 0)].awardUSD
steps.append(round_base * round((awards.min() + awards.max() / awards.count()) / round_base))
The bar graphs represents the number of award money for unique findings falling into the range indicated on the x-axis, for each report severity category.
Note that the x-axis scales are different for each graph, watch carefully.
PS: the labels are prefixed with a number only for the purpose of ordering the charts and doesn't represent the 'risk' present in the original data (i.e. high=3, med=2, low=1, etc.).
alt.vconcat(
*(chart.transform_filter(
alt.datum.risk == _risk
).encode(
x=alt.X(
"awardUSD:Q",
bin=alt.Bin(step=_step),
title='Award money ($USD)',
axis=alt.Axis(format='$,.0f')
),
tooltip=['risk:N', 'count():Q']
) for (_risk, _step) in list(zip(ordered_risk_labels[1:][::-1], steps))),
data=df
)
/opt/hostedtoolcache/Python/3.11.1/x64/lib/python3.11/site-packages/altair/utils/core.py:317: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead. for col_name, dtype in df.dtypes.iteritems():
df_contests = pd.read_csv('../contests_code4rena.csv')
df_contests.head(1)
| contest_report_repo | contest_sponsor | contest_desc | start | end | prize_pool | handle | prize_money | total_reports | high_all | high_solo | med_all | med_solo | gas_all | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | NaN | lsdnetwork-stakehouse | A permissionless 3 pool liquid staking solutio... | 11 November 2022 | 18 November 2022 | 75000 | Uunforgiven | 10860.28 | 10 | 8 | 3 | 2 | 0 | 0 |
df_github = pd.read_csv('../github_code4rena.csv')
df_github.head(1)
| contest | contest_sponsor | date | handle | address | risk | title | issueId | issueUrl | tags | issueCreation | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 94 | foundation | 2022/02 | bobi | 0x8F24B7bEe60c5421101e2e596b1008522368920F | Q | QA Report | 70 | https://github.com/code-423n4/2022-02-foundati... | bug;invalid;QA (Quality Assurance);sponsor dis... | 2022-03-02T21:26:45Z |
df_merge = pd.merge(df_contests[['contest_sponsor', 'prize_pool']], df_github[['contest_sponsor', 'contest']], how='outer', on='contest_sponsor')
df_merge = df_merge.dropna().reset_index(drop=True)
df_merge['contest'] = df_merge['contest'].astype(int)
df_merge.head(1)
| contest_sponsor | prize_pool | contest | |
|---|---|---|---|
| 0 | holograph | 75000.0 | 170 |
df_merge = df_merge[['contest', 'prize_pool']].drop_duplicates().reset_index(drop=True)
df = pd.merge(df, df_merge[['contest', 'prize_pool']], how='outer', on='contest')
df = df[df['prize_pool'].notna()].reset_index(drop=True)
box_size = 25
boxplot_chart = alt.Chart(width=800, height=400).transform_filter(
(alt.datum.split == 1) & (alt.datum.risk != '0') & (alt.datum.awardUSD != 0) # Remove '0' labels and 0 award values
).mark_boxplot(
extent='min-max',
size=box_size,
).encode(
x=alt.X(
'prize_pool:O',
axis=alt.Axis(title='Prize pool ($USD)', titleAngle=0, labelAngle=-45)
),
y=alt.Y(
'awardUSD:Q',
axis=alt.Axis(format='$,.0f', title='Award money ($USD)')
),
color=alt.Color(
'risk:N',
scale=alt.Scale(scheme='yelloworangered', domain=ordered_risk_labels[1:]),
legend=alt.Legend(title='Risk level')
),
)
count_prize_pool = alt.Chart(width=800, height=400).transform_filter(
(alt.datum.split == 1) & (alt.datum.risk != '0') & (alt.datum.awardUSD != 0) # Remove '0' labels and 0 award values
).mark_bar(
opacity=0.1
).encode(
x=alt.X(
'prize_pool:O',
axis=alt.Axis(title='Prize pool ($USD)', titleAngle=0, labelAngle=-45)
),
y=alt.Y(
'distinct(contest):Q',
title='Number of contests',
axis=alt.Axis(tickMinStep=1)
),
color=alt.Color(
'risk:N',
scale=alt.Scale(scheme='yelloworangered', domain=ordered_risk_labels[1:]),
legend=None
),
tooltip=['distinct(contest):Q']
)
#df[(df.split == 1) & (df.risk != '0') & (df.awardUSD != 0)].groupby('prize_pool')['contest'].nunique()
#count_prize_pool
The boxplots (right y-axis) indicates the distribution of award money for unique findings by prize pool, for each severity report level.
The bar charts in the background (left y-axis) indicates the number of contests grouped by prize pool, meaning the size of the data from which the distributions are calculated.
Note that the y-axis scales are different for each graph.
alt.vconcat(
*(alt.layer(count_prize_pool, boxplot_chart).resolve_scale(
y='independent',
color='independent'
).transform_filter(alt.datum.risk == _risk) for _risk in ordered_risk_labels[1:][::-1]),
data=df
)
/opt/hostedtoolcache/Python/3.11.1/x64/lib/python3.11/site-packages/altair/utils/core.py:317: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead. for col_name, dtype in df.dtypes.iteritems():